In [1]:
import ecco
import torch
import json
import pandas as pd
In [2]:
torch.cuda.empty_cache()
model_name = 'gpt2'
lm = ecco.from_pretrained(model_name, verbose=False)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
cuda
In [3]:
test_data = []
with open('data/saliency_test.jsonl', 'r') as inf:
    for line in inf:
        test_data.append(json.loads(line.strip()))
In [4]:
# df = pd.read_csv("model_answers.csv")

# def generate_demo(df, flip_label=False):
#     flip_label_mapping = {
#         1: "negative",
#         0: "positive"
#     }
#     label_mapping = {
#         0: "negative",
#         1: "positive"
#     }
#     flip_mapping = {
#         "positive": "negative",
#         "negative": "positive"
#     }
#     prompt = ''
#     for ele_idx in [20, 50, 108, 83, 15, 33, 93, 120]:
# #     for ele_idx in [20, 50, 108]:
#         ele = df.iloc[ele_idx]
#         if flip_label:
#             prompt += f"Review: {ele['texts']}\nLabel: {flip_mapping[ele['labels']]}\n\n"
#         else:
#             prompt += f"Review: {ele['texts']}\nLabel: {ele['labels']}\n\n"
    
#     return prompt
from short_demos import *

print(original1)
Review: straining to get by on humor that is not even as daring as john ritter 's glory days on three 's company . 
label: negative
Review: , serves as a paper skeleton for some very good acting , dialogue , comedy , direction and especially charm . 
label: positive
Review: a whole lot of fun and funny in the middle , though somewhat less hard-hitting at the start and finish . 
label: positive
Review: might have been saved if the director , tom dey , had spliced together bits and pieces of midnight run and 48 hours ( and , for that matter , shrek ) 
label: negative

Flip Label¶

In [5]:
def generate_saliency_map(contrastive_prompt, save_indicator, model_name='gpt2'):
    torch.cuda.empty_cache()
    model_name = model_name.replace('/', '-')
    attr_method = 'ig'
    for ele in test_data:
        print("="*100)
        review = ele['sentence']
        idx = ele['idx']
        print(f"Test sample {idx}")
        assert idx not in [20, 50, 108, 83, 15, 33, 93, 120]
    #     prompt = f"Review: straining to get by on humor that is not even as daring as john ritter 's glory days on three 's company .\nlabel: negative\n\nReview: manages for but a few seconds over its seemingly eternal running time to pique your interest , your imagination , your empathy or anything , really , save your disgust and your indifference\nlabel: negative\n\nReview: each story is built on a potentially interesting idea , but the first two are ruined by amateurish writing and acting , while the third feels limited by its short running time\nlabel: negative\n\nReview: , serves as a paper skeleton for some very good acting , dialogue , comedy , direction and especially charm .\nlabel: positive\n\nReview: the work of a filmmaker who has secrets buried at the heart of his story and knows how to take time revealing them .\nlabel: positive\n\nReview: a whole lot of fun and funny in the middle , though somewhat less hard-hitting at the start and finish .\nlabel: positive\n\nReview: might have been saved if the director , tom dey , had spliced together bits and pieces of midnight run and 48 hours ( and , for that matter , shrek )\nlabel: negative\n\nReview: the art direction is often exquisite , and the anthropomorphic animal characters are beautifully realized through clever makeup design , leaving one to hope that the eventual dvd release will offer subtitles and the original italian-language soundtrack\nlabel: positive\n\nReview: {review}\nlabel:"
        prompt = original1
        output = lm.generate(f"{prompt}Review: {review}\nLabel:", generate=1, do_sample=False, attribution=[attr_method])
        output.primary_attributions(attr_method=attr_method, style='minimal')
        data1 = {
            'tokens': output.tokens,
            'attributions': [att.tolist() for att in output.attribution[attr_method]],
            'label': ele['label'],
            'prediction': output.output_text
        }

        prompt = contrastive_prompt
        output = lm.generate(f"{prompt}Review: {review}\nLabel:", generate=1, do_sample=False, attribution=[attr_method])
        output.primary_attributions(attr_method=attr_method, style='minimal')

        data2 = {
            'tokens': output.tokens,
            'attributions': [att.tolist() for att in output.attribution[attr_method]],
            'label': ele['label'],
            'prediction': output.output_text
        }

        with open(f'results/model_{model_name}-attr_{attr_method}-perturbate_{save_indicator}-idx_{idx}.jsonl', 'w') as outf:
            outf.write(f"{json.dumps(data1)}\n")
            outf.write(f"{json.dumps(data2)}\n")
            
generate_saliency_map(flipping1, 'flip')
/fs/clip-projects/clip-causal/anaconda3/envs/xicl/lib/python3.8/site-packages/transformers/generation/utils.py:1255: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation)
  warnings.warn(
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 175
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 691
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 13
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 857
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 649
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 451
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 253
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 829
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 610
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 257
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 742
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 744
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 161
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 594
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 509
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 781
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 611
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 605
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 475
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 512
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.

Input Perturbation¶

In [6]:
generate_saliency_map(perturbation1, 'neutral')
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 175
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 691
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 13
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 857
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 649
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 451
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 253
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 829
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 610
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 257
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 742
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 744
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 161
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 594
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 509
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 781
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 611
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 605
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 475
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 512
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.

Complementary Explanation¶

In [7]:
generate_saliency_map(complementary1, 'explain')
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 175
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 691
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 13
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 857
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 649
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 451
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 253
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 829
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 610
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 257
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 742
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 744
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 161
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 594
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 509
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 781
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 611
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 605
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 475
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
====================================================================================================
Test sample 512
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
In [ ]: